Introducción a Pandas


In [3]:
from IPython.display import IFrame 
IFrame('http://pandas.pydata.org/', width=1000, height=350)


Out[3]:

In [4]:
import pandas as pd
import numpy as np

Cómo cargar y guardar datos desde diferentes fuentes ?


In [5]:
df_txt = pd.read_csv("data/medidas.txt", header=None, parse_dates=[1], keep_date_col=True)
#df_txt = pd.read_csv('data/all_test.txt', sep='\t', header=None)
df_txt


Out[5]:
0 1 2
0 8 2013-11-28 11:00:00 0
1 15 2013-11-28 11:00:00 692
2 16 2013-11-28 11:00:00 367
3 17 2013-11-28 11:00:00 0
4 18 2013-11-28 11:00:00 0
5 20 2013-12-01 09:30:00 71
6 6 2013-12-05 15:45:00 0
7 5 2013-11-18 22:15:00 29705
8 6 2013-11-18 22:15:00 15364
9 7 2013-11-18 22:15:00 41482
10 8 2013-11-18 22:15:00 52487
11 9 2013-11-18 22:15:00 0
12 10 2013-11-18 22:15:00 0
13 13 2013-11-18 22:15:00 22792
14 14 2013-11-18 22:15:00 18182
15 15 2013-11-18 22:15:00 0
16 16 2013-11-18 22:15:00 0
17 17 2013-11-18 22:15:00 22273
18 18 2013-11-18 22:15:00 31488
19 7 2013-11-18 22:30:00 64266
20 8 2013-11-18 22:30:00 41479
21 9 2013-11-18 22:30:00 0
22 10 2013-11-18 22:30:00 0
23 13 2013-11-18 22:30:00 14344
24 14 2013-11-18 22:30:00 11270
25 15 2013-11-18 22:30:00 0
26 16 2013-11-18 22:30:00 0
27 17 2013-11-18 22:30:00 20481
28 18 2013-11-18 22:30:00 29696
29 5 2013-11-18 22:30:00 24585
... ... ... ...
129691 20 2014-02-09 12:45:00 105
129692 12 2014-02-09 11:30:00 0
129693 9 2014-02-09 11:30:00 0
129694 10 2014-02-09 11:30:00 0
129695 9 2014-02-09 11:45:00 0
129696 10 2014-02-09 11:45:00 0
129697 7 2014-02-09 12:00:00 0
129698 8 2014-02-09 12:00:00 0
129699 5 2014-02-09 12:00:00 0
129700 6 2014-02-09 12:00:00 0
129701 15 2014-02-09 12:00:00 598
129702 16 2014-02-09 12:00:00 304
129703 19 2014-02-09 12:00:00 187
129704 20 2014-02-09 12:00:00 98
129705 17 2014-02-09 12:00:00 0
129706 18 2014-02-09 12:00:00 0
129707 9 2014-02-09 12:15:00 0
129708 10 2014-02-09 12:15:00 0
129709 5 2014-02-09 12:30:00 0
129710 6 2014-02-09 12:30:00 0
129711 15 2014-02-09 12:30:00 616
129712 16 2014-02-09 12:30:00 309
129713 19 2014-02-09 12:30:00 199
129714 20 2014-02-09 12:30:00 104
129715 17 2014-02-09 12:30:00 0
129716 18 2014-02-09 12:30:00 0
129717 11 2014-02-09 12:30:00 0
129718 12 2014-02-09 12:30:00 0
129719 23 2014-02-09 12:45:00 338
129720 24 2014-02-09 12:45:00 227

129721 rows × 3 columns


In [6]:
df_txt.shape


Out[6]:
(129721, 3)

In [7]:
df_other_excel = pd.read_excel('data/test.xls', 'Sheet0') # ver que sucede cuando ponemos header None
#print df_other_excel.columns
df_other_excel['Latitud'].apply(lambda x: x.replace(',', '.'))


Out[7]:
0               -38.73008
1               -38.70583
2               -38.74588
3               -38.71933
4               -38.71498
5               -38.72875
6               -38.74818
7               -38.72875
8               -38.71425
9               -38.69112
10              -38.70475
11              -38.75256
12              -38.70533
13      -38.6940002441406
14              -38.67708
15              -38.68735
16              -38.71095
17              -38.71096
18              -38.72744
19              -38.69223
20              -38.74731
21              -38.75501
22              -38.72717
23              -38.70318
24              -38.71692
25              -38.73853
26              -38.70213
27              -38.72409
28              -38.75764
29               -38.7292
              ...        
6330            -38.70954
6331            -38.71064
6332            -38.78465
6333            -38.70511
6334            -38.72084
6335            -38.70983
6336            -38.69252
6337             -38.7248
6338            -38.71496
6339            -38.73507
6340            -38.69466
6341            -38.70844
6342            -38.73331
6343            -38.70589
6344            -38.72077
6345            -38.71576
6346            -38.77082
6347            -38.71301
6348            -38.73957
6349            -38.77046
6350            -38.70701
6351            -38.71933
6352            -38.75234
6353            -38.71004
6354            -38.74628
6355    -38.7385559082031
6356            -38.72046
6357            -38.71361
6358            -38.71211
6359            -38.71972
Name: Latitud, dtype: object

In [8]:
df_other_excel.shape


Out[8]:
(6360, 11)

In [9]:
df_html = pd.read_html('data/lista_landmark.html')
df_html


Out[9]:
[       0                     1                           2
 0   Type                 Value                        Text
 1    NaN                   NaN              Fiducials List
 2    NaN    6.2, 128.7, -580.1       Espina nasal anterior
 3    NaN    7.2, 154.8, -570.5               Infraposnasal
 4    NaN   10.4, 121.4, -576.3                   Nariale R
 5    NaN    2.9, 121.9, -578.2                   Nariale L
 6    NaN   18.5, 118.8, -565.0                Alare oseo R
 7    NaN   -6.4, 120.5, -566.9                Alare oseo L
 8    NaN   25.8, 132.4, -575.0              Alare blando R
 9    NaN  -11.3, 136.4, -575.0              Alare blando L
 10   NaN   17.5, 117.2, -559.5     Concha nasal anterior R
 11   NaN   -5.4, 115.9, -563.7     Concha nasal anterior L
 12   NaN    8.9, 127.9, -575.0           Subalare medial R
 13   NaN    2.1, 127.0, -575.0           Subalare medial L
 14   NaN   20.6, 131.0, -575.0          Subalare lateral R
 15   NaN   -5.7, 134.8, -575.0          Subalare lateral L
 16   NaN   11.4, 148.9, -575.0         Subalare anterior R
 17   NaN    4.4, 149.5, -575.0         Subalare anterior L
 18   NaN    5.3, 140.4, -544.7                     Rhinion
 19   NaN    5.3, 128.4, -522.5                      Nasion
 20   NaN   15.3, 125.5, -524.4               Nasomaxilar R
 21   NaN   -3.5, 126.1, -524.0               Nasomaxilar L
 22   NaN    16.3, 76.8, -527.8              Canal optico R
 23   NaN    -8.3, 82.8, -527.7              Canal optico L
 24   NaN     7.0, 67.3, -557.3                     Hormion
 25   NaN     6.9, 77.4, -572.8      Espina nasal posterior
 26   NaN    17.0, 82.3, -574.8            Coana anterior R
 27   NaN    -2.3, 81.9, -574.8            Coana anterior L
 28   NaN    15.1, 65.5, -552.1           Coana posterior R
 29   NaN    -1.6, 62.6, -553.1            Coana posterir L
 30   NaN    -8.2, 72.0, -567.1             Coana lateral R
 31   NaN    21.9, 72.3, -566.0             Coana lateral L
 32   NaN     6.8, 72.4, -566.0           Coana posterior R
 33   NaN     6.2, 71.8, -567.0           Coana posterior L
 34   NaN    -6.5, 79.1, -561.9    Concha nasal posterior R
 35   NaN    21.0, 78.1, -561.0    Concha nasal posterior L
 36   NaN   14.9, 128.6, -552.7            Nasal inferior R
 37   NaN   -3.9, 128.6, -555.0            Nasal inferior L
 38   NaN                   NaN                  Ruler List
 39   NaN             51.4 [mm]              Longitud total
 40   NaN              0.0 [mm]               Longtud media
 41   NaN    6.9, 102.6, -579.4                     Plano B
 42   NaN    6.6, 102.6, -577.3            Tabique inferior
 43   NaN   22.9, 102.6, -575.7              Piso lateral R
 44   NaN  -12.5, 102.6, -575.2              Piso lateral L
 45   NaN   26.3, 102.6, -561.3      Margen medio lateral R
 46   NaN  -12.4, 102.6, -563.4      Margen medio lateral L
 47   NaN    6.1, 102.6, -527.5               Tabique medio
 48   NaN    19.5, 77.6, -572.4    Piso posterior lateral R
 49   NaN    -6.9, 77.6, -572.3    Piso posterior lateral L
 50   NaN    21.3, 77.6, -561.4  Margen posterior lateral R
 51   NaN    -7.4, 77.6, -562.0  Margen posterior lateral L
 52   NaN     6.6, 77.6, -547.6           Tabique posterior]

In [ ]:
df_ = pd.read_stata?

In [ ]:
df_ = pd.read_hdf?

In [ ]:
#df_tx.to_excel('data/test_out.xls', header=False, index=False) # cuidado con límites de planillas de excel!

In [ ]:
df_excel.to_clipboard?

Conociendo las estructuras de Pandas

Series


In [10]:
mi_serie = pd.Series([3.4, 56.3, 56.1])
print mi_serie.values
print mi_serie.index
mi_serie


[  3.4  56.3  56.1]
Int64Index([0, 1, 2], dtype='int64')
Out[10]:
0     3.4
1    56.3
2    56.1
dtype: float64

In [11]:
mi_serie = pd.Series([3.4, 56.3, 56.1], index=['dist_1', 'dist_2', 'dist_3'])
print mi_serie.values
print mi_serie.index
print mi_serie['dist_2']
print mi_serie[['dist_1', 'dist_3']]
mi_serie


[  3.4  56.3  56.1]
Index([u'dist_1', u'dist_2', u'dist_3'], dtype='object')
56.3
dist_1     3.4
dist_3    56.1
dtype: float64
Out[11]:
dist_1     3.4
dist_2    56.3
dist_3    56.1
dtype: float64
Operaciones con Series

In [12]:
mi_serie[mi_serie > 4]


Out[12]:
dist_2    56.3
dist_3    56.1
dtype: float64

In [13]:
mi_serie * 2


Out[13]:
dist_1      6.8
dist_2    112.6
dist_3    112.2
dtype: float64

In [14]:
np.sqrt(mi_serie)


Out[14]:
dist_1    1.843909
dist_2    7.503333
dist_3    7.489993
dtype: float64

In [15]:
pd.notnull(mi_serie)


Out[15]:
dist_1    True
dist_2    True
dist_3    True
dtype: bool

In [16]:
pd.isnull(mi_serie)


Out[16]:
dist_1    False
dist_2    False
dist_3    False
dtype: bool

Series Temporales


In [17]:
dates_complete = pd.date_range('1/18/2013', '02/09/2014', freq='15T')
other_dates = pd.date_range('1/18/2013', '02/09/2014', freq='1D')
print dates_complete
print other_dates
mi_serie = pd.Series(other_dates)
mi_serie_med = pd.Series(dates_complete)


DatetimeIndex(['2013-01-18 00:00:00', '2013-01-18 00:15:00',
               '2013-01-18 00:30:00', '2013-01-18 00:45:00',
               '2013-01-18 01:00:00', '2013-01-18 01:15:00',
               '2013-01-18 01:30:00', '2013-01-18 01:45:00',
               '2013-01-18 02:00:00', '2013-01-18 02:15:00',
               ...
               '2014-02-08 21:45:00', '2014-02-08 22:00:00',
               '2014-02-08 22:15:00', '2014-02-08 22:30:00',
               '2014-02-08 22:45:00', '2014-02-08 23:00:00',
               '2014-02-08 23:15:00', '2014-02-08 23:30:00',
               '2014-02-08 23:45:00', '2014-02-09 00:00:00'],
              dtype='datetime64[ns]', length=37153, freq='15T')
DatetimeIndex(['2013-01-18', '2013-01-19', '2013-01-20', '2013-01-21',
               '2013-01-22', '2013-01-23', '2013-01-24', '2013-01-25',
               '2013-01-26', '2013-01-27',
               ...
               '2014-01-31', '2014-02-01', '2014-02-02', '2014-02-03',
               '2014-02-04', '2014-02-05', '2014-02-06', '2014-02-07',
               '2014-02-08', '2014-02-09'],
              dtype='datetime64[ns]', length=388, freq='D')

DataFrames


In [18]:
type(df_other_excel)


Out[18]:
pandas.core.frame.DataFrame

In [19]:
df_other_excel


Out[19]:
Fecha Evento Domicilio Localidad Latitud Longitud Cantidad Ambulancias Ambulancia Derivación Atenciones en Lugar Derivaciones a Hospital
0 2014-01-01 00:12:59 ATENCION MEDICA EN DOMICILIOS NEWTON 1800 BAHIA BLANCA -38,73008 -62,23579 NaN NaN NaN NaN NaN
1 2014-01-01 02:16:00 ACCIDENTE DE TRANSITO TERRADA esq. 17 DE MAYO BAHIA BLANCA -38,70583 -62,29642 NaN NaN NaN NaN NaN
2 2014-01-01 03:08:40 ACCIDENTE DE TRANSITO CASEROS 3300 BAHIA BLANCA -38,74588 -62,22757 NaN NaN NaN NaN NaN
3 2014-01-01 03:10:06 ATENCION MEDICA EN VIA PUBLICA CATAMARCA 1500 BAHIA BLANCA -38,71933 -62,30212 NaN NaN NaN NaN NaN
4 2014-01-01 03:29:06 ACCIDENTE DE TRANSITO DR ADRIAN MORADO VERES esq. JUJUY BAHIA BLANCA -38,71498 -62,31009 NaN NaN NaN NaN NaN
5 2014-01-01 03:56:44 ATENCION MEDICA EN DOMICILIOS MATHEU 6000 BAHIA BLANCA -38,72875 -62,23798 NaN NaN NaN NaN NaN
6 2014-01-01 04:12:58 ATENCION MEDICA EN VIA PUBLICA CNEL VIDAL 9000 BAHIA BLANCA -38,74818 -62,28333 NaN NaN NaN NaN NaN
7 2014-01-01 04:19:14 ATENCION MEDICA EN DOMICILIOS MATHEU 6000 BAHIA BLANCA -38,72875 -62,23798 NaN NaN NaN NaN NaN
8 2014-01-01 04:21:42 ATENCION MEDICA EN VIA PUBLICA CORONEL MARTINIANO RODRIGUEZ esq. MITRE BAHIA BLANCA -38,71425 -62,26583 NaN NaN NaN NaN NaN
9 2014-01-01 05:01:05 ATENCION MEDICA EN VIA PUBLICA LEOPOLDO LUGONES 0 BAHIA BLANCA -38,69112 -62,3053 NaN NaN NaN NaN NaN
10 2014-01-01 05:11:52 ATENCION MEDICA EN VIA PUBLICA PBRO JUAN BAUTISTA BIGIO 9000 BAHIA BLANCA -38,70475 -62,31227 NaN NaN NaN NaN NaN
11 2014-01-01 05:18:04 ATENCION MEDICA EN VIA PUBLICA RIO NEGRO 2400 BAHIA BLANCA -38,75256 -62,27807 NaN NaN NaN NaN NaN
12 2014-01-01 06:54:26 ATENCION MEDICA EN VIA PUBLICA FUERTE ARGENTINO 3000 BAHIA BLANCA -38,70533 -62,26081 NaN NaN NaN NaN NaN
13 2014-01-01 08:51:31 ACCIDENTE DE TRANSITO CABRERA 3900 BAHIA BLANCA -38,6940002441406 -62,2383575439453 NaN NaN NaN NaN NaN
14 2014-01-01 09:28:54 ATENCION MEDICA EN DOMICILIOS FRANCISCA HERNANDEZ 0 BAHIA BLANCA -38,67708 -62,32211 NaN NaN NaN NaN NaN
15 2014-01-01 10:43:52 ATENCION MEDICA EN DOMICILIOS ESTEBAN RIGAMONTI 3000 BAHIA BLANCA -38,68735 -62,29587 NaN NaN NaN NaN NaN
16 2014-01-01 11:07:58 ATENCION MEDICA EN DOMICILIOS RINCON 279 1 0 BAHIA BLANCA -38,71095 -62,25317 NaN NaN NaN NaN NaN
17 2014-01-01 11:58:29 RECLAMOS POR ARBOLES CAIDOS O RAMAS MENDOZA 0 BAHIA BLANCA -38,71096 -62,28166 NaN NaN NaN NaN NaN
18 2014-01-01 12:38:07 ATENCION MEDICA EN DOMICILIOS ENTRE RIOS 2100 BAHIA BLANCA -38,72744 -62,30508 NaN NaN NaN NaN NaN
19 2014-01-01 12:51:37 ATENCION MEDICA EN VIA PUBLICA AYACUCHO 3000 BAHIA BLANCA -38,69223 -62,29871 NaN NaN NaN NaN NaN
20 2014-01-01 13:01:25 ATENCION MEDICA EN DOMICILIOS TIERRA DEL FUEGO 1500 BAHIA BLANCA -38,74731 -62,26654 NaN NaN NaN NaN NaN
21 2014-01-01 14:40:51 ATENCION MEDICA EN DOMICILIOS GRAL NICOLAS LEVALLE 1800 BAHIA BLANCA -38,75501 -62,26722 NaN NaN NaN NaN NaN
22 2014-01-01 15:04:30 ATENCION MEDICA EN DOMICILIOS DON BOSCO 3000 BAHIA BLANCA -38,72717 -62,2875 NaN NaN NaN NaN NaN
23 2014-01-01 15:52:56 ATENCION MEDICA EN VIA PUBLICA 12 DE OCTUBRE 6000 BAHIA BLANCA -38,70318 -62,26633 NaN NaN NaN NaN NaN
24 2014-01-01 15:58:02 ATENCION MEDICA EN DOMICILIOS GRAL PABLO RICCHIERI 2400 BAHIA BLANCA -38,71692 -62,32015 NaN NaN NaN NaN NaN
25 2014-01-01 16:09:32 ATENCION MEDICA EN DOMICILIOS HUAURA 3000 BAHIA BLANCA -38,73853 -62,2314 NaN NaN NaN NaN NaN
26 2014-01-01 16:29:15 ATENCION MEDICA EN DOMICILIOS HUMBERTO PRIMO 3000 BAHIA BLANCA -38,70213 -62,27465 NaN NaN NaN NaN NaN
27 2014-01-01 16:49:20 ACCIDENTE DE TRANSITO CNEL ROSALES esq. D ORBIGNY BAHIA BLANCA -38,72409 -62,23008 NaN NaN NaN NaN NaN
28 2014-01-01 17:13:52 ATENCION MEDICA EN DOMICILIOS HONDURAS 2700 BAHIA BLANCA -38,75764 -62,27384 NaN NaN NaN NaN NaN
29 2014-01-01 17:19:15 ATENCION MEDICA EN DOMICILIOS DARREGUEIRA 6000 BAHIA BLANCA -38,7292 -62,26293 NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ...
6330 2014-05-21 09:48:48 RECLAMOS POR ARBOLES CAIDOS O RAMAS AVDA ALEM 3000 BAHIA BLANCA -38,70954 -62,26416 NaN NaN NaN NaN NaN
6331 2014-05-21 11:10:07 ATENCION MEDICA EN DOMICILIOS SALTA 0 BAHIA BLANCA -38,71064 -62,27452 NaN NaN NaN NaN NaN
6332 2014-05-21 11:33:10 ATENCION MEDICA EN DOMICILIOS GRAL MANUEL BELGRANO 3600 ING. WHITE -38,78465 -62,26603 NaN NaN NaN NaN NaN
6333 2014-05-21 11:46:09 ACCIDENTE DE TRANSITO ESTOMBA esq. CORONEL MARTIN RODRIGUEZ BAHIA BLANCA -38,70511 -62,28356 NaN NaN NaN NaN NaN
6334 2014-05-21 12:18:34 ACCIDENTE DE TRANSITO TUCUMAN esq. PTE ROCA BAHIA BLANCA -38,72084 -62,27851 NaN NaN NaN NaN NaN
6335 2014-05-21 13:07:43 ACCIDENTE DE TRANSITO NAPOSTA esq. HIPOLITO YRIGOYEN BAHIA BLANCA -38,70983 -62,25637 NaN NaN NaN NaN NaN
6336 2014-05-21 13:19:25 ACCIDENTE DE TRANSITO FLORIDA esq. GRAL ALVEAR BAHIA BLANCA -38,69252 -62,27166 NaN NaN NaN NaN NaN
6337 2014-05-21 13:42:09 ATENCION MEDICA EN DOMICILIOS PJE JERONIMO CALVENTO 1800 BAHIA BLANCA -38,7248 -62,3004 NaN NaN NaN NaN NaN
6338 2014-05-21 14:21:38 ATENCION MEDICA EN DOMICILIOS PAMPA CENTRAL 2100 BAHIA BLANCA -38,71496 -62,31219 NaN NaN NaN NaN NaN
6339 2014-05-21 14:40:44 ATENCION MEDICA EN DOMICILIOS 25 DE MAYO 9000 BAHIA BLANCA -38,73507 -62,2701 NaN NaN NaN NaN NaN
6340 2014-05-21 15:35:31 ATENCION MEDICA EN DOMICILIOS ÑANCU 0 BAHIA BLANCA -38,69466 -62,21915 NaN NaN NaN NaN NaN
6341 2014-05-21 16:03:22 ACCIDENTE DE TRANSITO AVDA ALEM 6000 BAHIA BLANCA -38,70844 -62,26504 NaN NaN NaN NaN NaN
6342 2014-05-21 16:05:40 ATENCION MEDICA EN DOMICILIOS 3 DE FEBRERO 2700 BAHIA BLANCA -38,73331 -62,22632 NaN NaN NaN NaN NaN
6343 2014-05-21 16:41:30 ACCIDENTE DE TRANSITO VALENTIN ALSINA esq. RUFINO ROJAS BAHIA BLANCA -38,70589 -62,24969 NaN NaN NaN NaN NaN
6344 2014-05-21 17:28:42 ACCIDENTE DE TRANSITO MENDOZA esq. BOLIVIA BAHIA BLANCA -38,72077 -62,29408 NaN NaN NaN NaN NaN
6345 2014-05-21 18:05:24 ACCIDENTE DE TRANSITO BELGRANO 3000 BAHIA BLANCA -38,71576 -62,2595 NaN NaN NaN NaN NaN
6346 2014-05-21 19:00:20 ATENCION MEDICA EN DOMICILIOS MAESTRO PICCIOLI 3600 ING. WHITE -38,77082 -62,27824 NaN NaN NaN NaN NaN
6347 2014-05-21 19:16:39 ACCIDENTE DE TRANSITO PABLO LEJARRAGA esq. 14 DE JULIO BAHIA BLANCA -38,71301 -62,2291 NaN NaN NaN NaN NaN
6348 2014-05-21 20:14:04 ATENCION MEDICA EN DOMICILIOS RIVADAVIA 2700 BAHIA BLANCA -38,73957 -62,23365 NaN NaN NaN NaN NaN
6349 2014-05-21 20:20:29 RECLAMOS POR HUMO EN ZONA INDUSTRIAL MAESTRO PICCIOLI 3600 ING. WHITE -38,77046 -62,27781 NaN NaN NaN NaN NaN
6350 2014-05-21 20:45:28 RECLAMOS POR BACHES EN VIA PUBLICA 12 DE OCTUBRE esq. CARONTI BAHIA BLANCA -38,70701 -62,26326 NaN NaN NaN NaN NaN
6351 2014-05-21 21:32:24 ATENCION MEDICA A PARTURIENTAS CORNELIO SAAVEDRA 3000 CERRI -38,71933 -62,39386 NaN NaN NaN NaN NaN
6352 2014-05-21 22:03:08 RECLAMOS POR ARBOLES CAIDOS O RAMAS RAFAEL OBLIGADO 5100 BAHIA BLANCA -38,75234 -62,18884 NaN NaN NaN NaN NaN
6353 2014-05-22 01:05:32 ATENCION MEDICA EN DOMICILIOS BELGRANO 9000 BAHIA BLANCA -38,71004 -62,25299 NaN NaN NaN NaN NaN
6354 2014-05-22 02:37:26 ATENCION MEDICA EN DOMICILIOS MARCOS MORA 3000 BAHIA BLANCA -38,74628 -62,22436 NaN NaN NaN NaN NaN
6355 2014-05-22 06:09:10 ACCIDENTE DE TRANSITO MARIANO NECOCHEA 0 BAHIA BLANCA -38,7385559082031 -62,2416229248047 NaN NaN NaN NaN NaN
6356 2014-05-22 06:36:25 ATENCION MEDICA EN COMISARIAS PTE ROCA 3000 BAHIA BLANCA -38,72046 -62,27804 NaN NaN NaN NaN NaN
6357 2014-05-22 07:19:18 ACCIDENTE DE TRANSITO AVDA ALEM esq. HIPOLITO YRIGOYEN BAHIA BLANCA -38,71361 -62,26112 NaN NaN NaN NaN NaN
6358 2014-05-22 09:09:33 ATENCION MEDICA EN DOMICILIOS CORONEL MARTINIANO RODRIGUEZ 3000 BAHIA BLANCA -38,71211 -62,26304 NaN NaN NaN NaN NaN
6359 2014-05-22 09:22:10 RECLAMOS POR ARBOLES CAIDOS O RAMAS HARDING GREEN 9000 BAHIA BLANCA -38,71972 -62,28791 NaN NaN NaN NaN NaN

6360 rows × 11 columns


In [20]:
df_other_excel[['Evento', 'Domicilio']]


Out[20]:
Evento Domicilio
0 ATENCION MEDICA EN DOMICILIOS NEWTON 1800
1 ACCIDENTE DE TRANSITO TERRADA esq. 17 DE MAYO
2 ACCIDENTE DE TRANSITO CASEROS 3300
3 ATENCION MEDICA EN VIA PUBLICA CATAMARCA 1500
4 ACCIDENTE DE TRANSITO DR ADRIAN MORADO VERES esq. JUJUY
5 ATENCION MEDICA EN DOMICILIOS MATHEU 6000
6 ATENCION MEDICA EN VIA PUBLICA CNEL VIDAL 9000
7 ATENCION MEDICA EN DOMICILIOS MATHEU 6000
8 ATENCION MEDICA EN VIA PUBLICA CORONEL MARTINIANO RODRIGUEZ esq. MITRE
9 ATENCION MEDICA EN VIA PUBLICA LEOPOLDO LUGONES 0
10 ATENCION MEDICA EN VIA PUBLICA PBRO JUAN BAUTISTA BIGIO 9000
11 ATENCION MEDICA EN VIA PUBLICA RIO NEGRO 2400
12 ATENCION MEDICA EN VIA PUBLICA FUERTE ARGENTINO 3000
13 ACCIDENTE DE TRANSITO CABRERA 3900
14 ATENCION MEDICA EN DOMICILIOS FRANCISCA HERNANDEZ 0
15 ATENCION MEDICA EN DOMICILIOS ESTEBAN RIGAMONTI 3000
16 ATENCION MEDICA EN DOMICILIOS RINCON 279 1 0
17 RECLAMOS POR ARBOLES CAIDOS O RAMAS MENDOZA 0
18 ATENCION MEDICA EN DOMICILIOS ENTRE RIOS 2100
19 ATENCION MEDICA EN VIA PUBLICA AYACUCHO 3000
20 ATENCION MEDICA EN DOMICILIOS TIERRA DEL FUEGO 1500
21 ATENCION MEDICA EN DOMICILIOS GRAL NICOLAS LEVALLE 1800
22 ATENCION MEDICA EN DOMICILIOS DON BOSCO 3000
23 ATENCION MEDICA EN VIA PUBLICA 12 DE OCTUBRE 6000
24 ATENCION MEDICA EN DOMICILIOS GRAL PABLO RICCHIERI 2400
25 ATENCION MEDICA EN DOMICILIOS HUAURA 3000
26 ATENCION MEDICA EN DOMICILIOS HUMBERTO PRIMO 3000
27 ACCIDENTE DE TRANSITO CNEL ROSALES esq. D ORBIGNY
28 ATENCION MEDICA EN DOMICILIOS HONDURAS 2700
29 ATENCION MEDICA EN DOMICILIOS DARREGUEIRA 6000
... ... ...
6330 RECLAMOS POR ARBOLES CAIDOS O RAMAS AVDA ALEM 3000
6331 ATENCION MEDICA EN DOMICILIOS SALTA 0
6332 ATENCION MEDICA EN DOMICILIOS GRAL MANUEL BELGRANO 3600
6333 ACCIDENTE DE TRANSITO ESTOMBA esq. CORONEL MARTIN RODRIGUEZ
6334 ACCIDENTE DE TRANSITO TUCUMAN esq. PTE ROCA
6335 ACCIDENTE DE TRANSITO NAPOSTA esq. HIPOLITO YRIGOYEN
6336 ACCIDENTE DE TRANSITO FLORIDA esq. GRAL ALVEAR
6337 ATENCION MEDICA EN DOMICILIOS PJE JERONIMO CALVENTO 1800
6338 ATENCION MEDICA EN DOMICILIOS PAMPA CENTRAL 2100
6339 ATENCION MEDICA EN DOMICILIOS 25 DE MAYO 9000
6340 ATENCION MEDICA EN DOMICILIOS ÑANCU 0
6341 ACCIDENTE DE TRANSITO AVDA ALEM 6000
6342 ATENCION MEDICA EN DOMICILIOS 3 DE FEBRERO 2700
6343 ACCIDENTE DE TRANSITO VALENTIN ALSINA esq. RUFINO ROJAS
6344 ACCIDENTE DE TRANSITO MENDOZA esq. BOLIVIA
6345 ACCIDENTE DE TRANSITO BELGRANO 3000
6346 ATENCION MEDICA EN DOMICILIOS MAESTRO PICCIOLI 3600
6347 ACCIDENTE DE TRANSITO PABLO LEJARRAGA esq. 14 DE JULIO
6348 ATENCION MEDICA EN DOMICILIOS RIVADAVIA 2700
6349 RECLAMOS POR HUMO EN ZONA INDUSTRIAL MAESTRO PICCIOLI 3600
6350 RECLAMOS POR BACHES EN VIA PUBLICA 12 DE OCTUBRE esq. CARONTI
6351 ATENCION MEDICA A PARTURIENTAS CORNELIO SAAVEDRA 3000
6352 RECLAMOS POR ARBOLES CAIDOS O RAMAS RAFAEL OBLIGADO 5100
6353 ATENCION MEDICA EN DOMICILIOS BELGRANO 9000
6354 ATENCION MEDICA EN DOMICILIOS MARCOS MORA 3000
6355 ACCIDENTE DE TRANSITO MARIANO NECOCHEA 0
6356 ATENCION MEDICA EN COMISARIAS PTE ROCA 3000
6357 ACCIDENTE DE TRANSITO AVDA ALEM esq. HIPOLITO YRIGOYEN
6358 ATENCION MEDICA EN DOMICILIOS CORONEL MARTINIANO RODRIGUEZ 3000
6359 RECLAMOS POR ARBOLES CAIDOS O RAMAS HARDING GREEN 9000

6360 rows × 2 columns


In [21]:
print df_txt.columns
df_txt.columns = ['medidor', 'date', 'value']
print df_txt.columns
df_txt
df_txt['medidor'].drop_duplicates()


Int64Index([0, 1, 2], dtype='int64')
Index([u'medidor', u'date', u'value'], dtype='object')
Out[21]:
0       8
1      15
2      16
3      17
4      18
5      20
6       6
7       5
9       7
11      9
12     10
13     13
14     14
773    19
775    21
776    22
795    23
796    24
834    11
835    12
Name: medidor, dtype: int64

In [22]:
df_txt.ix[:4, :3]


Out[22]:
medidor date value
0 8 2013-11-28 11:00:00 0
1 15 2013-11-28 11:00:00 692
2 16 2013-11-28 11:00:00 367
3 17 2013-11-28 11:00:00 0
4 18 2013-11-28 11:00:00 0

In [23]:
df_txt['nueva_col'] = np.arange(len(df_txt['medidor']))
df_txt


Out[23]:
medidor date value nueva_col
0 8 2013-11-28 11:00:00 0 0
1 15 2013-11-28 11:00:00 692 1
2 16 2013-11-28 11:00:00 367 2
3 17 2013-11-28 11:00:00 0 3
4 18 2013-11-28 11:00:00 0 4
5 20 2013-12-01 09:30:00 71 5
6 6 2013-12-05 15:45:00 0 6
7 5 2013-11-18 22:15:00 29705 7
8 6 2013-11-18 22:15:00 15364 8
9 7 2013-11-18 22:15:00 41482 9
10 8 2013-11-18 22:15:00 52487 10
11 9 2013-11-18 22:15:00 0 11
12 10 2013-11-18 22:15:00 0 12
13 13 2013-11-18 22:15:00 22792 13
14 14 2013-11-18 22:15:00 18182 14
15 15 2013-11-18 22:15:00 0 15
16 16 2013-11-18 22:15:00 0 16
17 17 2013-11-18 22:15:00 22273 17
18 18 2013-11-18 22:15:00 31488 18
19 7 2013-11-18 22:30:00 64266 19
20 8 2013-11-18 22:30:00 41479 20
21 9 2013-11-18 22:30:00 0 21
22 10 2013-11-18 22:30:00 0 22
23 13 2013-11-18 22:30:00 14344 23
24 14 2013-11-18 22:30:00 11270 24
25 15 2013-11-18 22:30:00 0 25
26 16 2013-11-18 22:30:00 0 26
27 17 2013-11-18 22:30:00 20481 27
28 18 2013-11-18 22:30:00 29696 28
29 5 2013-11-18 22:30:00 24585 29
... ... ... ... ...
129691 20 2014-02-09 12:45:00 105 129691
129692 12 2014-02-09 11:30:00 0 129692
129693 9 2014-02-09 11:30:00 0 129693
129694 10 2014-02-09 11:30:00 0 129694
129695 9 2014-02-09 11:45:00 0 129695
129696 10 2014-02-09 11:45:00 0 129696
129697 7 2014-02-09 12:00:00 0 129697
129698 8 2014-02-09 12:00:00 0 129698
129699 5 2014-02-09 12:00:00 0 129699
129700 6 2014-02-09 12:00:00 0 129700
129701 15 2014-02-09 12:00:00 598 129701
129702 16 2014-02-09 12:00:00 304 129702
129703 19 2014-02-09 12:00:00 187 129703
129704 20 2014-02-09 12:00:00 98 129704
129705 17 2014-02-09 12:00:00 0 129705
129706 18 2014-02-09 12:00:00 0 129706
129707 9 2014-02-09 12:15:00 0 129707
129708 10 2014-02-09 12:15:00 0 129708
129709 5 2014-02-09 12:30:00 0 129709
129710 6 2014-02-09 12:30:00 0 129710
129711 15 2014-02-09 12:30:00 616 129711
129712 16 2014-02-09 12:30:00 309 129712
129713 19 2014-02-09 12:30:00 199 129713
129714 20 2014-02-09 12:30:00 104 129714
129715 17 2014-02-09 12:30:00 0 129715
129716 18 2014-02-09 12:30:00 0 129716
129717 11 2014-02-09 12:30:00 0 129717
129718 12 2014-02-09 12:30:00 0 129718
129719 23 2014-02-09 12:45:00 338 129719
129720 24 2014-02-09 12:45:00 227 129720

129721 rows × 4 columns


In [24]:
df_txt.T


Out[24]:
0 1 2 3 4 5 6 7 8 9 ... 129711 129712 129713 129714 129715 129716 129717 129718 129719 129720
medidor 8 15 16 17 18 20 6 5 6 7 ... 15 16 19 20 17 18 11 12 23 24
date 2013-11-28 11:00:00 2013-11-28 11:00:00 2013-11-28 11:00:00 2013-11-28 11:00:00 2013-11-28 11:00:00 2013-12-01 09:30:00 2013-12-05 15:45:00 2013-11-18 22:15:00 2013-11-18 22:15:00 2013-11-18 22:15:00 ... 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:30:00 2014-02-09 12:45:00 2014-02-09 12:45:00
value 0 692 367 0 0 71 0 29705 15364 41482 ... 616 309 199 104 0 0 0 0 338 227
nueva_col 0 1 2 3 4 5 6 7 8 9 ... 129711 129712 129713 129714 129715 129716 129717 129718 129719 129720

4 rows × 129721 columns


In [25]:
df_txt.values


Out[25]:
array([[8, Timestamp('2013-11-28 11:00:00'), 0, 0],
       [15, Timestamp('2013-11-28 11:00:00'), 692, 1],
       [16, Timestamp('2013-11-28 11:00:00'), 367, 2],
       ..., 
       [12, Timestamp('2014-02-09 12:30:00'), 0, 129718],
       [23, Timestamp('2014-02-09 12:45:00'), 338, 129719],
       [24, Timestamp('2014-02-09 12:45:00'), 227, 129720]], dtype=object)

In [26]:
df_txt.reindex(columns=['medidor', 'date', 'value'])


Out[26]:
medidor date value
0 8 2013-11-28 11:00:00 0
1 15 2013-11-28 11:00:00 692
2 16 2013-11-28 11:00:00 367
3 17 2013-11-28 11:00:00 0
4 18 2013-11-28 11:00:00 0
5 20 2013-12-01 09:30:00 71
6 6 2013-12-05 15:45:00 0
7 5 2013-11-18 22:15:00 29705
8 6 2013-11-18 22:15:00 15364
9 7 2013-11-18 22:15:00 41482
10 8 2013-11-18 22:15:00 52487
11 9 2013-11-18 22:15:00 0
12 10 2013-11-18 22:15:00 0
13 13 2013-11-18 22:15:00 22792
14 14 2013-11-18 22:15:00 18182
15 15 2013-11-18 22:15:00 0
16 16 2013-11-18 22:15:00 0
17 17 2013-11-18 22:15:00 22273
18 18 2013-11-18 22:15:00 31488
19 7 2013-11-18 22:30:00 64266
20 8 2013-11-18 22:30:00 41479
21 9 2013-11-18 22:30:00 0
22 10 2013-11-18 22:30:00 0
23 13 2013-11-18 22:30:00 14344
24 14 2013-11-18 22:30:00 11270
25 15 2013-11-18 22:30:00 0
26 16 2013-11-18 22:30:00 0
27 17 2013-11-18 22:30:00 20481
28 18 2013-11-18 22:30:00 29696
29 5 2013-11-18 22:30:00 24585
... ... ... ...
129691 20 2014-02-09 12:45:00 105
129692 12 2014-02-09 11:30:00 0
129693 9 2014-02-09 11:30:00 0
129694 10 2014-02-09 11:30:00 0
129695 9 2014-02-09 11:45:00 0
129696 10 2014-02-09 11:45:00 0
129697 7 2014-02-09 12:00:00 0
129698 8 2014-02-09 12:00:00 0
129699 5 2014-02-09 12:00:00 0
129700 6 2014-02-09 12:00:00 0
129701 15 2014-02-09 12:00:00 598
129702 16 2014-02-09 12:00:00 304
129703 19 2014-02-09 12:00:00 187
129704 20 2014-02-09 12:00:00 98
129705 17 2014-02-09 12:00:00 0
129706 18 2014-02-09 12:00:00 0
129707 9 2014-02-09 12:15:00 0
129708 10 2014-02-09 12:15:00 0
129709 5 2014-02-09 12:30:00 0
129710 6 2014-02-09 12:30:00 0
129711 15 2014-02-09 12:30:00 616
129712 16 2014-02-09 12:30:00 309
129713 19 2014-02-09 12:30:00 199
129714 20 2014-02-09 12:30:00 104
129715 17 2014-02-09 12:30:00 0
129716 18 2014-02-09 12:30:00 0
129717 11 2014-02-09 12:30:00 0
129718 12 2014-02-09 12:30:00 0
129719 23 2014-02-09 12:45:00 338
129720 24 2014-02-09 12:45:00 227

129721 rows × 3 columns


In [27]:
df_txt.drop([0, 1])


Out[27]:
medidor date value nueva_col
2 16 2013-11-28 11:00:00 367 2
3 17 2013-11-28 11:00:00 0 3
4 18 2013-11-28 11:00:00 0 4
5 20 2013-12-01 09:30:00 71 5
6 6 2013-12-05 15:45:00 0 6
7 5 2013-11-18 22:15:00 29705 7
8 6 2013-11-18 22:15:00 15364 8
9 7 2013-11-18 22:15:00 41482 9
10 8 2013-11-18 22:15:00 52487 10
11 9 2013-11-18 22:15:00 0 11
12 10 2013-11-18 22:15:00 0 12
13 13 2013-11-18 22:15:00 22792 13
14 14 2013-11-18 22:15:00 18182 14
15 15 2013-11-18 22:15:00 0 15
16 16 2013-11-18 22:15:00 0 16
17 17 2013-11-18 22:15:00 22273 17
18 18 2013-11-18 22:15:00 31488 18
19 7 2013-11-18 22:30:00 64266 19
20 8 2013-11-18 22:30:00 41479 20
21 9 2013-11-18 22:30:00 0 21
22 10 2013-11-18 22:30:00 0 22
23 13 2013-11-18 22:30:00 14344 23
24 14 2013-11-18 22:30:00 11270 24
25 15 2013-11-18 22:30:00 0 25
26 16 2013-11-18 22:30:00 0 26
27 17 2013-11-18 22:30:00 20481 27
28 18 2013-11-18 22:30:00 29696 28
29 5 2013-11-18 22:30:00 24585 29
30 6 2013-11-18 22:30:00 15364 30
31 7 2013-11-18 22:45:00 60170 31
... ... ... ... ...
129691 20 2014-02-09 12:45:00 105 129691
129692 12 2014-02-09 11:30:00 0 129692
129693 9 2014-02-09 11:30:00 0 129693
129694 10 2014-02-09 11:30:00 0 129694
129695 9 2014-02-09 11:45:00 0 129695
129696 10 2014-02-09 11:45:00 0 129696
129697 7 2014-02-09 12:00:00 0 129697
129698 8 2014-02-09 12:00:00 0 129698
129699 5 2014-02-09 12:00:00 0 129699
129700 6 2014-02-09 12:00:00 0 129700
129701 15 2014-02-09 12:00:00 598 129701
129702 16 2014-02-09 12:00:00 304 129702
129703 19 2014-02-09 12:00:00 187 129703
129704 20 2014-02-09 12:00:00 98 129704
129705 17 2014-02-09 12:00:00 0 129705
129706 18 2014-02-09 12:00:00 0 129706
129707 9 2014-02-09 12:15:00 0 129707
129708 10 2014-02-09 12:15:00 0 129708
129709 5 2014-02-09 12:30:00 0 129709
129710 6 2014-02-09 12:30:00 0 129710
129711 15 2014-02-09 12:30:00 616 129711
129712 16 2014-02-09 12:30:00 309 129712
129713 19 2014-02-09 12:30:00 199 129713
129714 20 2014-02-09 12:30:00 104 129714
129715 17 2014-02-09 12:30:00 0 129715
129716 18 2014-02-09 12:30:00 0 129716
129717 11 2014-02-09 12:30:00 0 129717
129718 12 2014-02-09 12:30:00 0 129718
129719 23 2014-02-09 12:45:00 338 129719
129720 24 2014-02-09 12:45:00 227 129720

129719 rows × 4 columns


In [28]:
df_txt.drop(['value'], axis=1)


Out[28]:
medidor date nueva_col
0 8 2013-11-28 11:00:00 0
1 15 2013-11-28 11:00:00 1
2 16 2013-11-28 11:00:00 2
3 17 2013-11-28 11:00:00 3
4 18 2013-11-28 11:00:00 4
5 20 2013-12-01 09:30:00 5
6 6 2013-12-05 15:45:00 6
7 5 2013-11-18 22:15:00 7
8 6 2013-11-18 22:15:00 8
9 7 2013-11-18 22:15:00 9
10 8 2013-11-18 22:15:00 10
11 9 2013-11-18 22:15:00 11
12 10 2013-11-18 22:15:00 12
13 13 2013-11-18 22:15:00 13
14 14 2013-11-18 22:15:00 14
15 15 2013-11-18 22:15:00 15
16 16 2013-11-18 22:15:00 16
17 17 2013-11-18 22:15:00 17
18 18 2013-11-18 22:15:00 18
19 7 2013-11-18 22:30:00 19
20 8 2013-11-18 22:30:00 20
21 9 2013-11-18 22:30:00 21
22 10 2013-11-18 22:30:00 22
23 13 2013-11-18 22:30:00 23
24 14 2013-11-18 22:30:00 24
25 15 2013-11-18 22:30:00 25
26 16 2013-11-18 22:30:00 26
27 17 2013-11-18 22:30:00 27
28 18 2013-11-18 22:30:00 28
29 5 2013-11-18 22:30:00 29
... ... ... ...
129691 20 2014-02-09 12:45:00 129691
129692 12 2014-02-09 11:30:00 129692
129693 9 2014-02-09 11:30:00 129693
129694 10 2014-02-09 11:30:00 129694
129695 9 2014-02-09 11:45:00 129695
129696 10 2014-02-09 11:45:00 129696
129697 7 2014-02-09 12:00:00 129697
129698 8 2014-02-09 12:00:00 129698
129699 5 2014-02-09 12:00:00 129699
129700 6 2014-02-09 12:00:00 129700
129701 15 2014-02-09 12:00:00 129701
129702 16 2014-02-09 12:00:00 129702
129703 19 2014-02-09 12:00:00 129703
129704 20 2014-02-09 12:00:00 129704
129705 17 2014-02-09 12:00:00 129705
129706 18 2014-02-09 12:00:00 129706
129707 9 2014-02-09 12:15:00 129707
129708 10 2014-02-09 12:15:00 129708
129709 5 2014-02-09 12:30:00 129709
129710 6 2014-02-09 12:30:00 129710
129711 15 2014-02-09 12:30:00 129711
129712 16 2014-02-09 12:30:00 129712
129713 19 2014-02-09 12:30:00 129713
129714 20 2014-02-09 12:30:00 129714
129715 17 2014-02-09 12:30:00 129715
129716 18 2014-02-09 12:30:00 129716
129717 11 2014-02-09 12:30:00 129717
129718 12 2014-02-09 12:30:00 129718
129719 23 2014-02-09 12:45:00 129719
129720 24 2014-02-09 12:45:00 129720

129721 rows × 3 columns


In [29]:
df1 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bcd'))
df1


Out[29]:
b c d
0 0 1 2
1 3 4 5
2 6 7 8
3 9 10 11

In [30]:
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=['b', 'd', 'e'])
df2


Out[30]:
b d e
0 0 1 2
1 3 4 5
2 6 7 8
3 9 10 11

In [31]:
suma_nan = df1 + df2
suma_nan


Out[31]:
b c d e
0 0 NaN 3 NaN
1 6 NaN 9 NaN
2 12 NaN 15 NaN
3 18 NaN 21 NaN

In [32]:
df1.add(df2, fill_value=0) #cambiar el cero para ver como afecta a las funciones


Out[32]:
b c d e
0 0 1 3 2
1 6 4 9 5
2 12 7 15 8
3 18 10 21 11

In [33]:
df1.div(df2, fill_value=1)


Out[33]:
b c d e
0 NaN 1 2.000000 0.500000
1 1 4 1.250000 0.200000
2 1 7 1.142857 0.125000
3 1 10 1.100000 0.090909

In [34]:
df1.apply(lambda x: x + 1)


Out[34]:
b c d
0 1 2 3
1 4 5 6
2 7 8 9
3 10 11 12

In [35]:
def mi_incremento(x):
    return x + 1

df1['b'].apply(mi_incremento)


Out[35]:
0     1
1     4
2     7
3    10
Name: b, dtype: float64

Ordenando y 'Rankeando' en Pandas


In [36]:
df1.sort_index(ascending=False, axis=1) # ver como cambia el orden cambiando axis


Out[36]:
d c b
0 2 1 0
1 5 4 3
2 8 7 6
3 11 10 9

In [38]:
df1['b'].sort_values()


Out[38]:
0    0
1    3
2    6
3    9
Name: b, dtype: float64

In [39]:
df1.rank(method='max', ascending=False) #method puede ser average, min, max o first
#df1


Out[39]:
b c d
0 4 4 4
1 3 3 3
2 2 2 2
3 1 1 1

Obteniendo información de nuestros DataFrames


In [40]:
df_txt.describe()


Out[40]:
medidor value nueva_col
count 129721.000000 129721.000000 129721.000000
mean 14.825263 7426.511374 64860.000000
std 6.074758 136264.699673 37447.371472
min 5.000000 0.000000 0.000000
25% 9.000000 0.000000 32430.000000
50% 16.000000 0.000000 64860.000000
75% 20.000000 344.000000 97290.000000
max 24.000000 6842516.000000 129720.000000

In [43]:
df_txt['value'].pct_change()


Out[43]:
0              NaN
1              inf
2        -0.469653
3        -1.000000
4              NaN
5              inf
6        -1.000000
7              inf
8        -0.482781
9         1.699948
10        0.265296
11       -1.000000
12             NaN
13             inf
14       -0.202264
15       -1.000000
16             NaN
17             inf
18        0.413730
19        1.040968
20       -0.354573
21       -1.000000
22             NaN
23             inf
24       -0.214306
25       -1.000000
26             NaN
27             inf
28        0.449929
29       -0.172111
            ...   
129691   -0.464286
129692   -1.000000
129693         NaN
129694         NaN
129695         NaN
129696         NaN
129697         NaN
129698         NaN
129699         NaN
129700         NaN
129701         inf
129702   -0.491639
129703   -0.384868
129704   -0.475936
129705   -1.000000
129706         NaN
129707         NaN
129708         NaN
129709         NaN
129710         NaN
129711         inf
129712   -0.498377
129713   -0.355987
129714   -0.477387
129715   -1.000000
129716         NaN
129717         NaN
129718         NaN
129719         inf
129720   -0.328402
Name: value, dtype: float64

In [44]:
df_txt['value'].pct_change


Out[44]:
<bound method Series.pct_change of 0             0
1           692
2           367
3             0
4             0
5            71
6             0
7         29705
8         15364
9         41482
10        52487
11            0
12            0
13        22792
14        18182
15            0
16            0
17        22273
18        31488
19        64266
20        41479
21            0
22            0
23        14344
24        11270
25            0
26            0
27        20481
28        29696
29        24585
          ...  
129691      105
129692        0
129693        0
129694        0
129695        0
129696        0
129697        0
129698        0
129699        0
129700        0
129701      598
129702      304
129703      187
129704       98
129705        0
129706        0
129707        0
129708        0
129709        0
129710        0
129711      616
129712      309
129713      199
129714      104
129715        0
129716        0
129717        0
129718        0
129719      338
129720      227
Name: value, dtype: int64>

In [45]:
df_txt['value'].corr(df_txt['medidor'])


Out[45]:
0.041885128928345339

In [46]:
df_txt.corr()


Out[46]:
medidor value nueva_col
medidor 1.000000 0.041885 -0.055407
value 0.041885 1.000000 0.011713
nueva_col -0.055407 0.011713 1.000000

Manejando Datos Perdidos


In [47]:
suma_nan.isnull()


Out[47]:
b c d e
0 False True False True
1 False True False True
2 False True False True
3 False True False True

In [48]:
test_1 = suma_nan.fillna(10)
test_1


Out[48]:
b c d e
0 0 10 3 10
1 6 10 9 10
2 12 10 15 10
3 18 10 21 10

In [49]:
test_2 = suma_nan.interpolate(method='linear', axis=1) #cambiar axis y ver que sucede
test_2


Out[49]:
b c d e
0 0 1.5 3 3
1 6 7.5 9 9
2 12 13.5 15 15
3 18 19.5 21 21

In [ ]:
suma_nan.interpolate?

In [51]:
import scipy.stats as stats

print stats.ttest_1samp(df_txt['value'], popmean=df_txt['value'].mean())


Ttest_1sampResult(statistic=0.0, pvalue=1.0)

Merge de DataFrames en Pandas


In [52]:
print test_1
print test_2
merged = pd.merge(test_1, test_2, on='d', how='outer')
merged
#left: use only keys from left frame (SQL: left outer join)
#right: use only keys from right frame (SQL: right outer join)
#outer: use union of keys from both frames (SQL: full outer join)
#inner: use intersection of keys from both frames (SQL: inner join)


    b   c   d   e
0   0  10   3  10
1   6  10   9  10
2  12  10  15  10
3  18  10  21  10
    b     c   d   e
0   0   1.5   3   3
1   6   7.5   9   9
2  12  13.5  15  15
3  18  19.5  21  21
Out[52]:
b_x c_x d e_x b_y c_y e_y
0 0 10 3 10 0 1.5 3
1 6 10 9 10 6 7.5 9
2 12 10 15 10 12 13.5 15
3 18 10 21 10 18 19.5 21

In [53]:
merged = pd.merge(test_1, test_2, on=['d', 'b', 'c', 'e'], how='inner')
merged


Out[53]:
b c d e

In [54]:
merged = pd.merge(test_1, test_2, on=['d', 'b', 'c', 'e'], how='outer')
merged


Out[54]:
b c d e
0 0 10.0 3 10
1 6 10.0 9 10
2 12 10.0 15 10
3 18 10.0 21 10
4 0 1.5 3 3
5 6 7.5 9 9
6 12 13.5 15 15
7 18 19.5 21 21

In [55]:
mi_serie_df = mi_serie_med.to_frame()
mi_serie_df.columns = ['date']
merged = pd.merge(df_txt, mi_serie_df, on='date', how='outer')
merged
print mi_serie_df.count()
print df_txt.count()
print merged.count()


date    37153
dtype: int64
medidor      129721
date         129721
value        129721
nueva_col    129721
dtype: int64
medidor      129721
date         159589
value        129721
nueva_col    129721
dtype: int64

In [56]:
merged_inter = merged.interpolate()
merged_inter.count()


Out[56]:
medidor      159589
date         159589
value        159589
nueva_col    159589
dtype: int64

In [57]:
pd.concat([df_txt['date'], df_other_excel['Evento']], axis=1) #notar que ocurre al cambiar el axis


Out[57]:
date Evento
0 2013-11-28 11:00:00 ATENCION MEDICA EN DOMICILIOS
1 2013-11-28 11:00:00 ACCIDENTE DE TRANSITO
2 2013-11-28 11:00:00 ACCIDENTE DE TRANSITO
3 2013-11-28 11:00:00 ATENCION MEDICA EN VIA PUBLICA
4 2013-11-28 11:00:00 ACCIDENTE DE TRANSITO
5 2013-12-01 09:30:00 ATENCION MEDICA EN DOMICILIOS
6 2013-12-05 15:45:00 ATENCION MEDICA EN VIA PUBLICA
7 2013-11-18 22:15:00 ATENCION MEDICA EN DOMICILIOS
8 2013-11-18 22:15:00 ATENCION MEDICA EN VIA PUBLICA
9 2013-11-18 22:15:00 ATENCION MEDICA EN VIA PUBLICA
10 2013-11-18 22:15:00 ATENCION MEDICA EN VIA PUBLICA
11 2013-11-18 22:15:00 ATENCION MEDICA EN VIA PUBLICA
12 2013-11-18 22:15:00 ATENCION MEDICA EN VIA PUBLICA
13 2013-11-18 22:15:00 ACCIDENTE DE TRANSITO
14 2013-11-18 22:15:00 ATENCION MEDICA EN DOMICILIOS
15 2013-11-18 22:15:00 ATENCION MEDICA EN DOMICILIOS
16 2013-11-18 22:15:00 ATENCION MEDICA EN DOMICILIOS
17 2013-11-18 22:15:00 RECLAMOS POR ARBOLES CAIDOS O RAMAS
18 2013-11-18 22:15:00 ATENCION MEDICA EN DOMICILIOS
19 2013-11-18 22:30:00 ATENCION MEDICA EN VIA PUBLICA
20 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
21 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
22 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
23 2013-11-18 22:30:00 ATENCION MEDICA EN VIA PUBLICA
24 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
25 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
26 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
27 2013-11-18 22:30:00 ACCIDENTE DE TRANSITO
28 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
29 2013-11-18 22:30:00 ATENCION MEDICA EN DOMICILIOS
... ... ...
129691 2014-02-09 12:45:00 NaN
129692 2014-02-09 11:30:00 NaN
129693 2014-02-09 11:30:00 NaN
129694 2014-02-09 11:30:00 NaN
129695 2014-02-09 11:45:00 NaN
129696 2014-02-09 11:45:00 NaN
129697 2014-02-09 12:00:00 NaN
129698 2014-02-09 12:00:00 NaN
129699 2014-02-09 12:00:00 NaN
129700 2014-02-09 12:00:00 NaN
129701 2014-02-09 12:00:00 NaN
129702 2014-02-09 12:00:00 NaN
129703 2014-02-09 12:00:00 NaN
129704 2014-02-09 12:00:00 NaN
129705 2014-02-09 12:00:00 NaN
129706 2014-02-09 12:00:00 NaN
129707 2014-02-09 12:15:00 NaN
129708 2014-02-09 12:15:00 NaN
129709 2014-02-09 12:30:00 NaN
129710 2014-02-09 12:30:00 NaN
129711 2014-02-09 12:30:00 NaN
129712 2014-02-09 12:30:00 NaN
129713 2014-02-09 12:30:00 NaN
129714 2014-02-09 12:30:00 NaN
129715 2014-02-09 12:30:00 NaN
129716 2014-02-09 12:30:00 NaN
129717 2014-02-09 12:30:00 NaN
129718 2014-02-09 12:30:00 NaN
129719 2014-02-09 12:45:00 NaN
129720 2014-02-09 12:45:00 NaN

129721 rows × 2 columns


In [58]:
print suma_nan
print df1
suma_nan.combine_first(df1)


    b   c   d   e
0   0 NaN   3 NaN
1   6 NaN   9 NaN
2  12 NaN  15 NaN
3  18 NaN  21 NaN
   b   c   d
0  0   1   2
1  3   4   5
2  6   7   8
3  9  10  11
Out[58]:
b c d e
0 0 1 3 NaN
1 6 4 9 NaN
2 12 7 15 NaN
3 18 10 21 NaN

In [ ]: